import numpy as np
import pandas as pd
import FinanceDataReader as fdr7 7. Pandas Practice
7.1 Practice 1 : Magic formula investing
krx = fdr.StockListing('KRX')
krx.iloc[:, :4]| Code | ISU_CD | Name | Market | |
|---|---|---|---|---|
| 0 | 005930 | KR7005930003 | 삼성전자 | KOSPI |
| 1 | 000660 | KR7000660001 | SK하이닉스 | KOSPI |
| 2 | 373220 | KR7373220003 | LG에너지솔루션 | KOSPI |
| 3 | 005380 | KR7005380001 | 현대차 | KOSPI |
| 4 | 207940 | KR7207940008 | 삼성바이오로직스 | KOSPI |
| ... | ... | ... | ... | ... |
| 2805 | 002995 | KR7002991008 | 금호건설우 | KOSPI |
| 2806 | 266170 | KR7266170000 | 뿌리깊은나무들 | KONEX |
| 2807 | 217320 | KR7217320001 | 썬테크 | KONEX |
| 2808 | 245450 | KR7245450002 | 씨앤에스링크 | KONEX |
| 2809 | 308700 | KR7308700004 | 테크엔 | KONEX |
2810 rows × 4 columns
df = pd.read_csv('investment_hw/PER_ROA.csv')
df.iloc[:5, [0, 3, 5, 6, 7, 8, 9]]| 종목명 | 등락률 | 거래량 | 시가총액 | 영업이익 | PER | ROA | |
|---|---|---|---|---|---|---|---|
| 0 | AJ네트웍스 | -1.48% | 27984 | 2177 | -213.0 | 4.14 | 2.18 |
| 1 | AJ렌터카 | -0.47% | 50886 | 2325 | 218.0 | 61.40 | 0.28 |
| 2 | AK홀딩스 | -1.99% | 15535 | 4564 | 2697.0 | 4.27 | 6.52 |
| 3 | ARIRANG 200 | -0.10% | 234206 | 7572 | NaN | NaN | NaN |
| 4 | ARIRANG 200동일가중 | -0.19% | 31 | 46 | NaN | NaN | NaN |
df = df.loc [ df.isnull().sum(axis=1) == 0, : ]per = per_val = df['PER']
per_val[ per < 0 ] = np.nan
per_rank = per_val.rank( ascending=True, na_option='bottom')
per_rank.head()0 35.0
1 519.0
2 38.0
50 222.0
51 408.0
Name: PER, dtype: float64
roa= roa_val = df['ROA']
roa_val[roa < 0] = np.nan
roa_rank = roa_val.rank( ascending=False, na_option='bottom')
roa_rank.head()0 355.0
1 556.0
2 110.0
50 239.5
51 37.0
Name: ROA, dtype: float64
result = per_rank + roa_rank
result_rank = result.rank( ascending=True, na_option='bottom')
result_rank[ result_rank > 10 ] = 0
result_rank [ result_rank > 0 ] = 1
result_rank0 0.0
1 0.0
2 0.0
50 0.0
51 0.0
...
1522 0.0
1523 0.0
1524 0.0
1526 0.0
1529 0.0
Length: 758, dtype: float64
result_rank.sum()10.0
mf_df = df.loc[ result_rank > 0 , ['종목명', '시가총액']].copy()
mf_df| 종목명 | 시가총액 | |
|---|---|---|
| 96 | HDC | 7468 |
| 440 | SIMPAC | 1835 |
| 459 | SK하이닉스 | 541634 |
| 749 | 대한유화 | 7508 |
| 1040 | 세아제강지주 | 2044 |
| 1057 | 신대양제지 | 2756 |
| 1157 | 에쓰씨엔지니어링 | 354 |
| 1298 | 케이씨 | 1694 |
| 1443 | 한일홀딩스 | 2904 |
| 1513 | 효성 | 18353 |
mf_stock_list = df.loc[ result_rank > 0, '종목명'].values
mf_stock_listarray(['HDC', 'SIMPAC', 'SK하이닉스', '대한유화', '세아제강지주', '신대양제지', '에쓰씨엔지니어링',
'케이씨', '한일홀딩스', '효성'], dtype=object)
krx.iloc[:,:4].head()| Code | ISU_CD | Name | Market | |
|---|---|---|---|---|
| 0 | 005930 | KR7005930003 | 삼성전자 | KOSPI |
| 1 | 000660 | KR7000660001 | SK하이닉스 | KOSPI |
| 2 | 373220 | KR7373220003 | LG에너지솔루션 | KOSPI |
| 3 | 005380 | KR7005380001 | 현대차 | KOSPI |
| 4 | 207940 | KR7207940008 | 삼성바이오로직스 | KOSPI |
mf_df['종목코드']=''for stock in mf_stock_list:
mf_df.loc[ mf_df['종목명']==stock, '종목코드']=krx[krx['Name']==stock]['Code'].values
mf_df| 종목명 | 시가총액 | 종목코드 | |
|---|---|---|---|
| 96 | HDC | 7468 | 012630 |
| 440 | SIMPAC | 1835 | 009160 |
| 459 | SK하이닉스 | 541634 | 000660 |
| 749 | 대한유화 | 7508 | 006650 |
| 1040 | 세아제강지주 | 2044 | 003030 |
| 1057 | 신대양제지 | 2756 | 016590 |
| 1157 | 에쓰씨엔지니어링 | 354 | 023960 |
| 1298 | 케이씨 | 1694 | 029460 |
| 1443 | 한일홀딩스 | 2904 | 003300 |
| 1513 | 효성 | 18353 | 004800 |
for x in mf_df['종목코드'].values :
df = fdr.DataReader( x, '2019-01-01', '2019-12-31' )
cum_ret = df.loc[df.index[-1], 'Close'] / df.loc[df.index[0], 'Close']-1
mf_df.loc[mf_df['종목코드']==x, '수익률'] = cum_ret
historical_max = df['Close'].cummax()
daily_drawdown = df['Close']/historical_max - 1.
MDD = daily_drawdown.min()
mf_df.loc[mf_df['종목코드']==x, '최대낙폭'] = MDD
df['daily_rtn'] = df['Close'].pct_change( periods = 1 )
VOL = df['daily_rtn'].std()*np.sqrt(252)
mf_df.loc[mf_df['종목코드']==x, '변동성'] = VOL
df = None
mf_df.sort_values('시가총액', ascending=False)| 종목명 | 시가총액 | 종목코드 | 수익률 | 최대낙폭 | 변동성 | |
|---|---|---|---|---|---|---|
| 459 | SK하이닉스 | 541634 | 000660 | 0.552805 | -0.228606 | 0.353011 |
| 1513 | 효성 | 18353 | 004800 | 0.649635 | -0.115124 | 0.300178 |
| 749 | 대한유화 | 7508 | 006650 | -0.203390 | -0.407609 | 0.346504 |
| 96 | HDC | 7468 | 012630 | -0.332326 | -0.504808 | 0.330864 |
| 1443 | 한일홀딩스 | 2904 | 003300 | -0.197424 | -0.363892 | 0.324106 |
| 1057 | 신대양제지 | 2756 | 016590 | 0.034783 | -0.388466 | 0.364506 |
| 1040 | 세아제강지주 | 2044 | 003030 | -0.008368 | -0.228353 | 0.240910 |
| 440 | SIMPAC | 1835 | 009160 | 0.206967 | -0.299566 | 0.407815 |
| 1298 | 케이씨 | 1694 | 029460 | 0.594017 | -0.369942 | 0.442983 |
| 1157 | 에쓰씨엔지니어링 | 354 | 023960 | -0.062092 | -0.479592 | 0.565059 |
7.2 Practice 2 : Mean reversion strategy
df = pd.read_csv('investment_hw/SPY.csv')
df.head()| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 1993-01-29 | 43.96875 | 43.96875 | 43.75000 | 43.93750 | 26.706757 | 1003200 |
| 1 | 1993-02-01 | 43.96875 | 44.25000 | 43.96875 | 44.25000 | 26.896694 | 480500 |
| 2 | 1993-02-02 | 44.21875 | 44.37500 | 44.12500 | 44.34375 | 26.953669 | 201300 |
| 3 | 1993-02-03 | 44.40625 | 44.84375 | 44.37500 | 44.81250 | 27.238594 | 529400 |
| 4 | 1993-02-04 | 44.96875 | 45.09375 | 44.46875 | 45.00000 | 27.352570 | 531500 |
df.describe()| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| count | 6648.000000 | 6648.000000 | 6648.000000 | 6648.000000 | 6648.000000 | 6.648000e+03 |
| mean | 133.762935 | 134.541071 | 132.893598 | 133.759854 | 110.399391 | 8.440122e+07 |
| std | 59.488006 | 59.671285 | 59.277882 | 59.492056 | 64.113369 | 9.837713e+07 |
| min | 43.343750 | 43.531250 | 42.812500 | 43.406250 | 26.383823 | 5.200000e+03 |
| 25% | 96.780937 | 97.735000 | 95.726562 | 96.921875 | 71.256485 | 6.966775e+06 |
| 50% | 124.433750 | 125.335938 | 123.500000 | 124.312500 | 93.641503 | 5.709990e+07 |
| 75% | 151.702503 | 152.514999 | 150.810624 | 151.791714 | 125.251474 | 1.229908e+08 |
| max | 296.040009 | 296.309998 | 293.760010 | 295.859985 | 294.427979 | 8.710263e+08 |
df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6648 entries, 0 to 6647
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 6648 non-null object
1 Open 6648 non-null float64
2 High 6648 non-null float64
3 Low 6648 non-null float64
4 Close 6648 non-null float64
5 Adj Close 6648 non-null float64
6 Volume 6648 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 363.7+ KB
df['Date']=pd.to_datetime(df.Date)
df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6648 entries, 0 to 6647
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 6648 non-null datetime64[ns]
1 Open 6648 non-null float64
2 High 6648 non-null float64
3 Low 6648 non-null float64
4 Close 6648 non-null float64
5 Adj Close 6648 non-null float64
6 Volume 6648 non-null int64
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 363.7 KB
price_df = df.loc[:,['Date', 'Adj Close']].copy()
price_df.head()| Date | Adj Close | |
|---|---|---|
| 0 | 1993-01-29 | 26.706757 |
| 1 | 1993-02-01 | 26.896694 |
| 2 | 1993-02-02 | 26.953669 |
| 3 | 1993-02-03 | 27.238594 |
| 4 | 1993-02-04 | 27.352570 |
price_df.set_index(['Date'], inplace=True)
price_df.head()| Adj Close | |
|---|---|
| Date | |
| 1993-01-29 | 26.706757 |
| 1993-02-01 | 26.896694 |
| 1993-02-02 | 26.953669 |
| 1993-02-03 | 27.238594 |
| 1993-02-04 | 27.352570 |
# 볼린저 밴드
# - 상단 밴드 : 중간밴드 + 2*20일 이동표준편차
# - 중간 밴드 : 20일 이동표준편차
# - 하단 밴드 : 중간밴드 - 2*20일 이동표준편차
n = 20
sigma = 2
def bollinger_band(price_df, n, sigma):
bb = price_df.copy()
bb['center']= price_df['Adj Close'].rolling(n).mean()
bb['ub']= bb['center'] + sigma * price_df['Adj Close'].rolling(window=n).std()
bb['lb']= bb['center'] - sigma * price_df['Adj Close'].rolling(window=n).std()
return bbbollinger = bollinger_band(price_df, n, sigma)
bollinger.tail()| Adj Close | center | ub | lb | |
|---|---|---|---|---|
| Date | ||||
| 2019-06-18 | 290.984741 | 282.981668 | 293.213256 | 272.750080 |
| 2019-06-19 | 291.641541 | 283.307582 | 294.219075 | 272.396089 |
| 2019-06-20 | 294.427979 | 283.816605 | 295.809022 | 271.824187 |
| 2019-06-21 | 294.000000 | 284.477884 | 297.200319 | 271.755449 |
| 2019-06-24 | 293.640015 | 285.089319 | 298.355028 | 271.823610 |
base_date = '2009-01-02'
sample = bollinger.loc[base_date:]
sample.head()| Adj Close | center | ub | lb | |
|---|---|---|---|---|
| Date | ||||
| 2009-01-02 | 75.099487 | 71.378963 | 74.538229 | 68.219698 |
| 2009-01-05 | 75.010582 | 71.711677 | 74.931608 | 68.491746 |
| 2009-01-06 | 75.511505 | 71.964058 | 75.543401 | 68.384716 |
| 2009-01-07 | 73.249435 | 71.980327 | 75.580937 | 68.379718 |
| 2009-01-08 | 73.548378 | 72.071645 | 75.736733 | 68.406557 |
def create_trade_book(sample):
book = sample[['Adj Close']].copy()
book['trade'] = ''
return bookdef tradings ( sample, book ):
for i in sample.index:
if sample.loc[i, 'Adj Close'] > sample.loc[i, 'ub']:
book.loc[i, 'trade']=''
elif sample.loc[i, 'lb'] > sample.loc[i, 'Adj Close']:
if book.shift(1).loc[i, 'trade']=='buy':
book.loc[i, 'trade']='buy'
else :
book.loc[i, 'trade']='buy'
elif (sample.loc[i, 'ub']>= sample.loc[i, 'Adj Close'] and
sample.loc[i, 'Adj Close']>=sample.loc[i, 'lb']):
if book.shift(1).loc[i, 'trade'] == 'buy':
book.loc[i, 'trade']='buy'
else :
book.loc[i, 'trade']=''
return bookbook = create_trade_book(sample)
book = tradings(sample, book)
book.tail(10)| Adj Close | trade | |
|---|---|---|
| Date | ||
| 2019-06-11 | 287.501678 | buy |
| 2019-06-12 | 286.994171 | buy |
| 2019-06-13 | 288.178375 | buy |
| 2019-06-14 | 287.859955 | buy |
| 2019-06-17 | 287.969391 | buy |
| 2019-06-18 | 290.984741 | buy |
| 2019-06-19 | 291.641541 | buy |
| 2019-06-20 | 294.427979 | buy |
| 2019-06-21 | 294.000000 | buy |
| 2019-06-24 | 293.640015 | buy |
def returns(book):
rtn = 1.0
book['return']=1
buy = 0.0
sell = 0.0
for i in book.index:
#long 진입
if book.loc[i, 'trade']=='buy' and book.shift(1).loc[i, 'trade']=='':
buy = book.loc[i, 'Adj Close']
print('진입일 :', i, '\n\t\t long 진입가격:', buy)
#long 청산
elif book.loc[i, 'trade']=='' and book.shift(1).loc[i, 'trade']=='buy':
sell = book.loc[i, 'Adj Close']
rtn = (sell-buy)/buy + 1 # 손익계산
book.loc[i, 'return']= rtn
print('청산일 :', i, '\n\t\t long 진입가격:', buy,
'long 청산가격:', sell, '\n\t\t| return :', round(rtn, 4))
if book.loc[i, 'trade']=='': # 제로 포지션
buy = sell = 0.0
book['acc return'] = book['return'].cumprod()
acc_rtn = book['acc return'][-1]
print( 'Accumulated return :', round( acc_rtn, 4))
return( round(acc_rtn, 4) )returns(book)진입일 : 2009-01-20 00:00:00
long 진입가격: 65.089966
청산일 : 2009-03-23 00:00:00
long 진입가격: 65.089966 long 청산가격: 66.898392
| return : 1.0278
진입일 : 2010-01-22 00:00:00
long 진입가격: 90.269791
청산일 : 2010-04-14 00:00:00
long 진입가격: 90.269791 long 청산가격: 100.584618
| return : 1.1143
진입일 : 2010-05-04 00:00:00
long 진입가격: 97.538597
청산일 : 2010-10-13 00:00:00
long 진입가격: 97.538597 long 청산가격: 98.862717
| return : 1.0136
진입일 : 2011-03-10 00:00:00
long 진입가격: 109.513054
청산일 : 2011-04-26 00:00:00
long 진입가격: 109.513054 long 청산가격: 114.094101
| return : 1.0418
진입일 : 2011-05-23 00:00:00
long 진입가격: 111.783257
청산일 : 2011-06-30 00:00:00
long 진입가격: 111.783257 long 청산가격: 112.26088
| return : 1.0043
진입일 : 2011-08-02 00:00:00
long 진입가격: 106.748672
청산일 : 2012-02-03 00:00:00
long 진입가격: 106.748672 long 청산가격: 115.768776
| return : 1.0845
진입일 : 2012-04-10 00:00:00
long 진입가격: 117.451515
청산일 : 2012-07-03 00:00:00
long 진입가격: 117.451515 long 청산가격: 119.371857
| return : 1.0164
진입일 : 2012-10-23 00:00:00
long 진입가격: 123.511292
청산일 : 2012-12-18 00:00:00
long 진입가격: 123.511292 long 청산가격: 126.961044
| return : 1.0279
진입일 : 2013-06-05 00:00:00
long 진입가격: 142.477417
청산일 : 2013-07-11 00:00:00
long 진입가격: 142.477417 long 청산가격: 148.711197
| return : 1.0438
진입일 : 2013-08-15 00:00:00
long 진입가격: 147.769791
청산일 : 2013-09-11 00:00:00
long 진입가격: 147.769791 long 청산가격: 150.45195
| return : 1.0182
진입일 : 2013-10-08 00:00:00
long 진입가격: 147.686783
청산일 : 2013-10-17 00:00:00
long 진입가격: 147.686783 long 청산가격: 154.594528
| return : 1.0468
진입일 : 2014-01-24 00:00:00
long 진입가격: 160.521667
청산일 : 2014-04-02 00:00:00
long 진입가격: 160.521667 long 청산가격: 170.233917
| return : 1.0605
진입일 : 2014-04-11 00:00:00
long 진입가격: 163.591492
청산일 : 2014-05-27 00:00:00
long 진입가격: 163.591492 long 청산가격: 172.613312
| return : 1.0551
진입일 : 2014-07-31 00:00:00
long 진입가격: 174.862244
청산일 : 2014-09-18 00:00:00
long 진입가격: 174.862244 long 청산가격: 182.768143
| return : 1.0452
진입일 : 2014-09-25 00:00:00
long 진입가격: 178.636536
청산일 : 2016-04-13 00:00:00
long 진입가격: 178.636536 long 청산가격: 195.218124
| return : 1.0928
진입일 : 2016-06-24 00:00:00
long 진입가격: 191.742584
청산일 : 2016-08-11 00:00:00
long 진입가격: 191.742584 long 청산가격: 206.280853
| return : 1.0758
진입일 : 2016-09-09 00:00:00
long 진입가격: 201.21463
청산일 : 2016-12-07 00:00:00
long 진입가격: 201.21463 long 청산가격: 212.964615
| return : 1.0584
진입일 : 2017-03-21 00:00:00
long 진입가격: 223.89772
청산일 : 2017-04-24 00:00:00
long 진입가격: 223.89772 long 청산가격: 227.193008
| return : 1.0147
진입일 : 2017-07-06 00:00:00
long 진입가격: 231.55455
청산일 : 2017-07-14 00:00:00
long 진입가격: 231.55455 long 청산가격: 236.377182
| return : 1.0208
진입일 : 2017-08-10 00:00:00
long 진입가격: 234.644501
청산일 : 2017-09-11 00:00:00
long 진입가격: 234.644501 long 청산가격: 239.890701
| return : 1.0224
진입일 : 2018-02-05 00:00:00
long 진입가격: 256.626129
청산일 : 2018-05-10 00:00:00
long 진입가격: 256.626129 long 청산가격: 265.551544
| return : 1.0348
진입일 : 2018-06-27 00:00:00
long 진입가격: 264.125763
청산일 : 2018-08-07 00:00:00
long 진입가격: 264.125763 long 청산가격: 280.040985
| return : 1.0603
진입일 : 2018-10-10 00:00:00
long 진입가격: 274.137573
청산일 : 2019-03-18 00:00:00
long 진입가격: 274.137573 long 청산가격: 280.96347
| return : 1.0249
진입일 : 2019-05-13 00:00:00
long 진입가격: 279.50058
Accumulated return : 2.6528
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/1068455384.py:15: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '1.027783483555668' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
book.loc[i, 'return']= rtn
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/1068455384.py:23: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
acc_rtn = book['acc return'][-1]
2.6528
book.tail()| Adj Close | trade | return | acc return | |
|---|---|---|---|---|
| Date | ||||
| 2019-06-18 | 290.984741 | buy | 1.0 | 2.652793 |
| 2019-06-19 | 291.641541 | buy | 1.0 | 2.652793 |
| 2019-06-20 | 294.427979 | buy | 1.0 | 2.652793 |
| 2019-06-21 | 294.000000 | buy | 1.0 | 2.652793 |
| 2019-06-24 | 293.640015 | buy | 1.0 | 2.652793 |
result = book['return'].resample('A').aggregate(
[np.prod, lambda x: (x-1).std() * np.sqrt(252) ])
result.columns = ['Return', 'Vol']
result/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/2886312715.py:1: FutureWarning: 'A' is deprecated and will be removed in a future version, please use 'YE' instead.
result = book['return'].resample('A').aggregate(
/var/folders/n2/jbh_0_091bx8qgz7j87t2qwc0000gp/T/ipykernel_19470/2886312715.py:1: FutureWarning: The provided callable <function prod at 0x105f46dd0> is currently using SeriesGroupBy.prod. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "prod" instead.
result = book['return'].resample('A').aggregate(
| Return | Vol | |
|---|---|---|
| Date | ||
| 2009-12-31 | 1.027783 | 0.027783 |
| 2010-12-31 | 1.129393 | 0.115017 |
| 2011-12-31 | 1.046283 | 0.042032 |
| 2012-12-31 | 1.133016 | 0.090658 |
| 2013-12-31 | 1.112403 | 0.066348 |
| 2014-12-31 | 1.169581 | 0.093156 |
| 2015-12-31 | 1.000000 | 0.000000 |
| 2016-12-31 | 1.244337 | 0.132818 |
| 2017-12-31 | 1.059011 | 0.033853 |
| 2018-12-31 | 1.097132 | 0.069591 |
| 2019-12-31 | 1.024900 | 0.036083 |
book[(book['trade']=='buy') &
(book['trade'].shift(1)=='')].index.year.value_counts().sort_index()Date
2009 1
2010 2
2011 3
2012 2
2013 3
2014 4
2016 2
2017 3
2018 3
2019 1
Name: count, dtype: int64
sample[(book['trade']=='') & (book['trade'].shift(1)=='buy')].head()| Adj Close | center | ub | lb | |
|---|---|---|---|---|
| Date | ||||
| 2009-03-23 | 66.898392 | 60.247113 | 66.842427 | 53.651799 |
| 2010-04-14 | 100.584618 | 97.853165 | 100.219620 | 95.486710 |
| 2010-10-13 | 98.862717 | 96.240554 | 98.860432 | 93.620676 |
| 2011-04-26 | 114.094101 | 112.277610 | 114.046010 | 110.509210 |
| 2011-06-30 | 112.260880 | 109.184084 | 111.624576 | 106.743592 |